home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-13
/
emac16ds.zip
/
SEARCH.ASM
< prev
next >
Wrap
Assembly Source File
|
1991-06-13
|
28KB
|
1,219 lines
;History:107,1
;Mon Apr 23 23:22:13 1990 Add \( and \) capability (won't work with *)
;Sun Apr 22 23:30:23 1990 omatch_NCCL bombed on the first failure to match.
;Sun Apr 22 23:12:21 1990 don't bomb locate on null classes.
;Wed Apr 11 22:55:42 1990 three alternations (\|) would cause a crash.
;Wed Apr 11 22:54:54 1990 \W should include newline.
;Wed Nov 29 23:58:27 1989 Add support for \|
;Tue Nov 07 23:45:44 1989 match newlines in character classes.
;Mon Nov 06 00:40:16 1989 try to make backwards regexp searches work.
;Sat Nov 05 22:05:14 1988 let CR LF match LINENEW.
;10-08-88 08:48:54 add \n to regexp search.
;09-26-88 21:23:42 add case translation for character classes.
;08-19-88 23:36:40 closure didn't work because omatch iterated on matching.
;08-13-88 22:12:46 try forwards again.
;07-24-88 16:42:24 BOL and EOL match BOB and EOB respectively.
;07-21-88 22:49:18 add optimized search backwards.
;07-20-88 00:15:38 too late at night to continue...
;07-20-88 00:02:35 optimize forward searches.
;07-19-88 23:38:07 use the right omatch_chr for both regexps and literals.
;07-19-88 00:51:06 initialize the case table.
;07-18-88 21:20:18 don't increment di twice in omatch_NCCL
;07-18-88 00:04:34 replace bad patterns with "".
;07-17-88 23:15:23 Check for topbot right after incrementing di.
;07-17-88 22:55:12 search *at* the end_ptr (check for end_ptr after searching).
;07-17-88 18:54:53 when searching backwards, don't search past right_ptr.
;07-17-88 10:59:27 save di around omatch()
;07-17-88 10:42:13 omatch_CHR was incrementing di even if it didn't match.
;06-06-88 23:58:09 change the regexp chars to match Gnu's.
;07-06-87 06:55:31 Use botbot for eof, not LINENEW
include memory.def
data segment byte public
b_struc struc
b db ?
b_struc ends
w_struc struc
w dw ?
w_struc ends
extrn outpat: byte
extrn OUTPATSIZE: abs
inpat_ptr dw ? ;beginning of input pattern.
direction dw ? ;routine to increment di in correct direction.
scan_char dw ? ;routine to scan for a character.
end_ptr dw ? ;end of region we're searching.
right_ptr dw ? ;rightmost end of region we're searching.
clo_si dw ? ;saved pointer for closure.
last_ptr dw ? ;pointer to last character matched.
which_chr dw ? ;which omatch_CHR to use.
this_pattern dw ? ;->this pattern (for closure).
last_pattern dw ? ;->previous pattern (for closure).
last_or dw ? ;->last or pointer.
paren_count dw ? ;number of unmatched parentheses.
extrn textseg: word
init_case dw init_case_table
case_ignore_table db 256 dup(?)
data ends
bufseg segment public
extrn toptop: word
extrn topbot: word
extrn bottop: word
extrn botbot: word
bufseg ends
code segment byte public
assume cs:code, ds:data
public slowly
extrn get_mark: near, set_mark_si: near
extrn get_syntax: near
public search
search:
;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark.
;start searching at mark ch. If the string is found, then return the
; beginning in mark dh, and the end in mark dl, and cy=0. If the string
; wasn't found, return cy=1.
push dx ;save the first, last marks.
push es
mov es,textseg
assume es:bufseg
push ds ;save ds
push es
pop ds
assume ds:bufseg ;for get_mark
mov al,cl ;get the end mark.
push cx
call get_mark
mov bp,si ;save a copy of it.
pop cx
mov al,ch ;get the start mark.
call get_mark
pop ds ;restore ds
assume ds:data
mov end_ptr,bp ;save a copy of the end.
mov right_ptr,bp ;save a copy of the end.
mov direction,offset inc_di
cmp si,end_ptr ;start>=end?
jb search_4 ;no. (doesn't matter if they're equal)
mov direction,offset dec_di ;yes, go in reverse direction.
mov right_ptr,si ;yes, remember that start is rightmost.
search_4:
mov di,si ;get the pointer to our string.
call slowly
pop es
assume es:data
pop dx
jc search_1 ;not found.
push ds
mov ds,textseg ;for set_mark_si
assume ds:bufseg
mov al,dh
mov si,di
call set_mark_si ;set the first mark.
mov si,last_ptr
mov al,dl
call set_mark_si ;set the last mark.
pop ds
assume ds:data
clc ;return a match.
ret
search_1:
stc ;return no match.
ret
assume ds:data, es:bufseg
scan_char_literal:
or sp,sp ;ensure NZ in case cx=0.
repne scasb ;search for the character.
ret
scan_char_fold:
xlat
mov ah,al
or sp,sp ;if cx=0, be sure to return nz.
jcxz scan_char_fold_2
shr cx,1 ;we unrolled the loop once.
jnc scan_char_fold_1 ;if even, start at the top.
inc cx ;otherwise, add one for the odd
jmp short scan_char_fold_3 ; iteration, and jump to it.
scan_char_fold_1:
mov al,es:[di] ;unroll this puppy once.
add di,dx
xlat
cmp al,ah ;compare them.
je scan_char_fold_2 ;if equal, we're done.
scan_char_fold_3:
mov al,es:[di] ;now do the second set.
add di,dx
xlat
cmp al,ah
loopne scan_char_fold_1
scan_char_fold_2:
mov al,ah ;get our character back.
ret
slowly:
;es:si -> first char to look at.
;es:right_ptr -> after last char to look at.
;return cy if no match,
; else nc, si->start of match, last_ptr->after end of match.
cmp di,topbot ;at topbot already?
jne slowly_0
mov di,bottop
slowly_0:
mov ax,which_chr ;does the pattern start with a CHR?
cmp ax,word ptr outpat
jne slowly_1 ;no.
cmp outpat+2,CR ;searching for literal CR?
je slowly_1 ;yes - don't optimize because of CRLFs.
cmp outpat+2,LF ;searching for literal LF?
je slowly_1 ;yes - don't optimize because of CRLFs.
mov scan_char,offset scan_char_literal
cmp ax,offset omatch_CHR ;Are we folding case?
je quickly_1 ;no.
mov scan_char,offset scan_char_fold
quickly_1:
cmp direction,offset inc_di ;Are we going forwards?
je forwards_0 ;yes.
if 0 ;disable optimization for now.
jmp slowly_1
endif
jmp backwards_0 ;no.
slowly_1:
mov si,offset outpat ;start at beginning of pattern.
mov bx,offset case_ignore_table
push di ;remember where we're starting.
call omatch ;now search.
pop di
jnc slowly_succeed ;we found a match
;not found, should we give up?
cmp di,end_ptr ;at the end yet?
je slowly_fail ;yes - not found.
;not found, we have to bump di.
call direction
jmp slowly_1
slowly_fail:
stc ;not found.
ret
slowly_succeed:
if 1 ;an attempt to make backwards regexp searches work right.
cmp direction,offset inc_di ;Are we going forwards?
je slowly_done ;yes - we're done now.
slowly_backwards_again:
call dec_di ;move backwards.
push last_ptr ;remember the pointer to the end of it.
mov si,offset outpat ;start at beginning of pattern.
mov bx,offset case_ignore_table
push di
call omatch ;did it match?
pop di
pop ax
jc slowly_backwards_done ;no - we're done.
cmp ax,last_ptr ;did last_ptr change?
je slowly_backwards_again ;no, we can try again.
slowly_backwards_done:
mov last_ptr,ax
call inc_di ;point to the last match again.
slowly_done:
endif
clc
ret
public forwards_0
forwards_0:
mov bx,offset case_ignore_table
mov al,outpat+2 ;get the character
cmp di,bottop ;are we in the bottom?
jae forwards_2 ;yes - don't search the top.
mov cx,topbot ;should we search to topbot
cmp cx,end_ptr ; or to end_ptr?
jbe forwards_3
mov cx,end_ptr ;just to end_ptr.
forwards_3:
sub cx,di ;compute the amount left in the top.
mov dx,1
call scan_char ;scan for our character.
je forwards_1 ;we found it!
cmp di,end_ptr ;are we at the end?
jae slowly_fail ;yes - no match.
mov di,bottop
forwards_2:
mov cx,end_ptr ;we only need search that far.
sub cx,di
mov dx,1
call scan_char ;scan for our character.
jne slowly_fail ;we didn't find it.
forwards_1:
mov si,offset outpat+3 ;start at beginning of pattern.
push di ;remember where we're starting.
call omatch ;now search.
pop di
jnc forwards_4 ;we matched - return it.
cmp di,end_ptr ;are we at the end?
jb forwards_0 ;no - keep matching.
slowly_fail_j_1:
jmp slowly_fail ;yes - no match.
forwards_4:
dec di ;remember that we actually started
jmp slowly_succeed ; one character into the pattern.
public backwards_0
backwards_0:
mov bx,offset case_ignore_table
mov al,outpat+2 ;get the character
cmp di,bottop ;are we in the top?
jb backwards_2 ;yes - don't search the bottom.
je backwards_5
mov si,bottop ;should we search to bottop
cmp si,end_ptr ; or to end_ptr?
jae backwards_3
mov si,end_ptr ;just to end_ptr.
backwards_3:
dec di
mov cx,di ;compute the amount left in the bottom.
sub cx,si
inc cx ;be sure to look at where di points.
std
mov dx,-1
call scan_char ;scan for our character.
cld
je backwards_1 ;we found it!
backwards_5:
cmp di,end_ptr ;are we at the end?
jbe slowly_fail_j_1 ;yes - no match.
mov di,topbot
dec di
backwards_2:
mov cx,di ;we only search here if end_ptr is here.
sub cx,end_ptr
inc cx ;be sure to compare where di is.
std
mov dx,-1
call scan_char ;scan for our character.
cld
jne slowly_fail_j_1 ;we didn't find it.
backwards_1:
mov si,offset outpat+3 ;start at beginning of pattern.
push di ;remember where we're starting.
add di,2 ;we post-decremented.
call omatch ;now search.
pop di
jnc backwards_4 ;we suceeded.
inc di
cmp di,end_ptr ;are we after the end?
jb slowly_fail_j_1 ;yes - no match.
dec di
jmp backwards_0
backwards_4:
inc di ;remember that we post-decremented,
jmp slowly_succeed ; so we're one character too far.
inc_di:
;bump di forwards.
inc di
cmp di,topbot ;at bottom of top?
je inc_di_1 ;yes - can't possibly be split over newline.
cmp es:[di-1].w,LINENEW ;did we just move into a newline?
jne inc_di_2 ;no.
inc di ;yes - skip LF part of newline.
cmp di,topbot ;at topbot already?
jne inc_di_2
inc_di_1:
mov di,bottop
inc_di_2:
ret
dec_di:
;bump di backwards.
cmp di,bottop ;at top of bottom?
jne dec_di_1 ;no.
mov di,topbot ;yes - load bottom of top.
dec_di_1:
dec di ;back up to previous character.
cmp es:[di-1].w,LINENEW ;at newline?
jne dec_di_2 ;no.
cmp di,bottop ;at top of bottom now?
je dec_di_2 ;yes - can't possibly be split over newline.
dec di ;yes - skip to beginning of newline.
dec_di_2:
ret
omatch:
;return nc if we matched, cy if not.
;es:di -> source text
;ds:si -> pattern
omatch_0:
cmp di,topbot ;at bottom of top?
jne omatch_1
mov di,bottop ;yes, go to top of bottom.
omatch_1:
lodsw
call ax
jnc omatch_0
ret
;each of the omatch_XXX routines operates under the following constraints
; on failure, return with cy set.
; on matching (only used by omatch_EOS right now), return to caller's caller
; with cy clear.
; on success, bump si as needed so that it points to the next omatch,
; bump di as needed (either zero or one), and return with cy clear.
public omatch_EOS
omatch_EOS:
mov last_ptr,di ;remember the last thing we matched.
add sp,2 ;pop our return address.
clc ;if we get to the end of the
ret ; pattern, then we matched.
public omatch_CLO
omatch_CLO:
push di ;save the first closure pattern.
mov CLO_si,si ;remember the pattern we're closing.
;Note that we don't have to worry about CLO_si being global because the
; next pattern can't be another closure.
;match as many as fit the next pattern
mov bx,offset case_ignore_table
omatch_CLO_1:
mov si,CLO_si ;get the pattern being closed.
cmp di,topbot ;at bottom of top?
jne omatch_CLO_5
mov di,bottop ;yes, go to top of bottom.
omatch_CLO_5:
lodsw
call ax
jnc omatch_CLO_1
pop bx
;match only as many as fit the pattern after the next pattern.
omatch_CLO_2:
push si
push di
push bx
mov bx,offset case_ignore_table
call omatch ;try to match rest of pattern.
pop bx
pop di
pop si
jnc omatch_CLO_4 ;go if it matched.
cmp di,bottop ;backing up past the point?
jne omatch_CLO_3 ;no - just decrement.
mov di,topbot ;yes - get the bottom of the top.
omatch_CLO_3:
dec di ;point to the previous character.
cmp di,bx ;zero or more matches still?
jae omatch_CLO_2 ;yes.
stc ;no matches--return no match.
ret
omatch_CLO_4:
pop bx ;get rid of our return address.
ret
omatch_PAREN:
push si
push di
mov bx,offset case_ignore_table
call omatch ;try to match rest of pattern.
jnc omatch_PAREN_1 ;go if it matched.
pop di
pop si
ret
omatch_PAREN_1:
add sp,4 ;get rid of si and di.
;guaranteed nc.
ret
omatch_OR:
add si,2 ;skip past our param.
push si
push di
mov bx,offset case_ignore_table
call omatch ;try to match rest of pattern.
jnc omatch_OR_1 ;go if it matched.
pop di
pop si
push si
mov si,[si-2] ;point to the next or-clause.
push di
call omatch
jnc omatch_OR_1 ;go if it matched.
pop di
pop si
;guaranteed cy.
ret
omatch_OR_1:
add sp,6 ;get rid of si,di, and our return addr.
;guaranteed nc.
ret
public omatch_CHR
omatch_CHR:
cmp di,right_ptr ;are we at the end?
je omatch_CHR_skip ;yes - we never match CHR
cmp es:[di].w,LINENEW
je omatch_CHR_linenew
cmpsb
je omatch_yes ;if they're the same, match again.
dec di ;don't modify buffer pointer if no match.
stc
ret
omatch_CHR_linenew:
cmp [si].b,CR ;got a LINENEW, are we looking for one?
jne omatch_CHR_skip ;no.
mov ax,which_chr ;is the next one another char?
cmp [si+1].w,ax
jne omatch_CHR_skip ;no - no match.
cmp [si+1+2].b,LF ;Are we really looking for a linenew?
jne omatch_CHR_skip ;no - no match.
add si,1+2+1 ;skip past the two of them.
add di,2 ;skip in the buffer also.
clc
ret
omatch_CHR_skip:
inc si ;skip the pattern character.
omatch_CHR_no:
stc
ret
omatch_yes:
clc
ret
public omatch_NCHR
omatch_NCHR:
cmp di,right_ptr ;are we at the end?
je omatch_CHR_skip ;yes - we never match CHR
cmp es:[di].w,LINENEW
je omatch_CHR_linenew
lodsb
xlat
mov ah,al
mov al,es:[di]
inc di
xlat
cmp ah,al
je omatch_yes ;if they're the same, match again.
dec di ;don't modify buffer pointer if no match.
stc
ret
omatch_NL:
cmp di,right_ptr ;are we at the end?
je omatch_NL_no ;yes - we never match newline.
cmp es:[di].w,LINENEW ;is it newline?
jne omatch_NL_no ;no - don't match it.
add di,2 ;yes - skip it.
clc
ret
omatch_NL_no:
stc
ret
public omatch_BOB
omatch_BOB:
;match beginning of buffer.
cmp di,toptop ;are we at the beginning of the buffer?
je omatch_yes ;yes.
stc
ret
public omatch_BOL
omatch_BOL:
;match beginning of line.
push di ;we might have to look at the top.
cmp di,bottop ;are we at the point?
jne omatch_BOL_1 ;yes - ok.
mov di,topbot ;no - get the top.
omatch_BOL_1:
cmp di,toptop
je omatch_BOL_2
cmp es:[di-2].w,LINENEW
pop di
jne omatch_CHR_no
clc
ret
omatch_BOL_2:
pop di
clc
ret
public omatch_ISW
omatch_ISW:
;match word character.
cmp di,botbot
je omatch_CHR_no
cmp es:[di].w,LINENEW
je omatch_CHR_no
call chars_around_di
test al,1 ;word character?
je omatch_CHR_no ;nope--no match.
inc di ;match the character.
clc
ret
public omatch_NOW
omatch_NOW:
;match NOt Word character.
cmp di,botbot
je omatch_no
cmp es:[di].w,LINENEW
je omatch_NOW_1
call chars_around_di
test al,1 ;whitespace before and word after?
jne omatch_no ;nope--no match.
inc di ;match the character.
clc
ret
omatch_NOW_1:
add di,2 ;skip past the newline,
clc ; and match.
ret
public omatch_BOW
omatch_BOW:
;match beginning of word.
cmp di,botbot
je omatch_no
cmp es:[di].w,LINENEW
je omatch_no
call chars_around_di
cmp al,1 ;whitespace before and word after?
jne omatch_no ;nope--no match.
clc
ret
public omatch_EOW
omatch_EOW:
;match end of word.
call chars_around_di
cmp al,2 ;word before and whitespace after?
jne omatch_no ;nope--no match.
clc
ret
public omatch_WOR
omatch_WOR:
;match end of word.
call chars_around_di
cmp al,2 ;word before and whitespace after?
je omatch_WOR_yes ;yes - match.
cmp al,1 ;whitespace before and word after?
je omatch_WOR_yes ;yes - match.
stc
ret
omatch_WOR_yes:
clc
ret
public omatch_NWR
omatch_NWR:
;match end of word.
call chars_around_di
cmp al,0 ;whitespace before and whitespace after?
je omatch_NWR_yes ;yes - match.
cmp al,3 ;word before and word after?
je omatch_NWR_yes ;yes - match.
stc
ret
omatch_NWR_yes:
clc
ret
public omatch_EOB
omatch_EOB:
;match end of buffer.
cmp di,botbot ;are we at the end of the buffer?
je omatch_NWR_yes ;yes.
stc
ret
public omatch_EOL
omatch_EOL:
;match end of line.
cmp di,botbot ;are we at the end?
je omatch_EOL_yes ;yes.
cmp es:[di].w,LINENEW
jne omatch_no
omatch_EOL_yes:
clc
ret
omatch_no:
stc
ret
public omatch_ANY
omatch_ANY:
;match any single character.
cmp di,right_ptr ;are we at the end?
je omatch_no ;yes - we never match ANY
cmp es:[di].w,LINENEW ;we never match EOL.
je omatch_no
inc di
clc
ret
public omatch_CCL
omatch_CCL:
;match a character class.
cmp di,right_ptr ;are we at the end?
je omatch_ccl_no ;yes - we never match CCL
cmp es:[di].w,LINENEW ;we never match EOL.
je omatch_ccl_newline
call locate ;see if it's in our set.
jnz omatch_no ;nope.
inc di
clc
ret
omatch_ccl_newline:
lea ax,[di+1] ;are we near the end?
cmp ax,right_ptr
je omatch_ccl_no ;yes - no match.
cmp ds:[si+1].w,LINENEW ;does the class begin with crlf?
jne omatch_ccl_no ;no - don't match it.
lodsb ;skip past this pattern.
xor ah,ah
add si,ax
add di,2
clc
ret
public omatch_NCCL
omatch_NCCL:
;match not in a character class.
cmp di,right_ptr ;are we at the end?
je omatch_ccl_no ;yes - we never match NCCL
cmp es:[di].w,LINENEW ;we only match EOL if it begins the class.
je omatch_ccl_no
call locate ;see if it's in our set.
jz omatch_no ;yes - we don't match.
inc di
clc
ret
omatch_ccl_no:
lodsb ;skip past the pattern.
xor ah,ah
add si,ax
stc
ret
locate:
;es:di -> search string, bx -> case translate table.
;ds:si -> CCL
;exit with zr if found, nz if not found, si -> after the pattern.
push cx
lodsb ;get the count.
mov cl,al
xor ch,ch
jcxz locate_1 ;if empty class, it doesn't match.
mov al,es:[di] ;get the character we're trying to match.
xlat ;case translate it.
mov ah,al ;keep it somewhere safe.
locate_2:
lodsb
xlat
cmp al,ah ;is this one it?
loopne locate_2
lahf ;remember whether or not we found it.
add si,cx
sahf
pop cx
ret
locate_1:
or sp,sp ;return nz.
pop cx
ret
chars_around_di:
;return al bit 1=syntax of char to left of point.
; al bit 0=syntax of char to right of point.
push di ;get the character before point.
cmp di,bottop ;are we at the point?
jne chars_around_di_1 ;yes.
mov di,topbot
chars_around_di_1:
xor al,al ;if no character, it's whitespace.
cmp di,toptop
je chars_around_di_2
mov al,es:[di-1]
call get_syntax ;get the syntax for the char before point.
and al,1 ;isolate the 'word' bit.
chars_around_di_2:
shl al,1
mov ah,al
pop di
xor al,al ;if no character, it's whitespace.
cmp di,botbot ;are we at the end?
je chars_around_di_3 ;yes - can't match beginning of word.
mov al,es:[di]
call get_syntax
and al,1
chars_around_di_3:
or al,ah ;include the syntax of the char to left of point.
ret
assume ds:data
public set_pattern
set_pattern:
;enter with si, cx->pattern. dx<>0 if regular expression. di <> 0 if we
; want to fold case.
;exit with cy=1 if error.
call init_case
mov ax,offset omatch_CHR
or di,di
je set_pattern_0
mov ax,offset omatch_NCHR
set_pattern_0:
mov which_chr,ax ;remember which omatch_CHR to use.
or dx,dx
jne regexp_pat
mov di,offset outpat
jcxz set_pattern_1
mov bp,offset outpat-2
add bp,OUTPATSIZE
set_pattern_2:
cmp di,bp ;do we have enough room?
jae set_pattern_3 ;no - quit now.
stosw ;store the appropriate comparison omatcher.
movsb
loop set_pattern_2
set_pattern_1:
mov ax,offset omatch_EOS ;store the end of string.
stosw
clc
ret
set_pattern_3:
stc
ret
public regexp_pat
regexp_pat:
;enter with si, cx->pattern.
;exit with cy=1 if error.
mov bx,cx
mov [si+bx],byte ptr 0 ;store the terminating null.
call makepat
jnc regexp_pat_1
mov word ptr outpat,offset omatch_EOS ;uh-oh, bad pattern -- null it.
regexp_pat_1:
ret
makepat:
;si -> source pat (null terminated)
;di -> dest pattern, dx -> last dest entry.
;bx -> last closure
;return cy=1 if error.
mov inpat_ptr,si
mov di,offset outpat
mov dx,OUTPATSIZE
add dx,di
mov last_pattern,-1 ;remember where the previous pattern started.
mov last_or,di ;remember that it's here.
mov paren_count,0 ;start with no parens.
makepat_1:
lodsb ;get the first character.
or al,al ;end of string?
je makepat_0 ;yes.
mov this_pattern,di ;remember where this pattern starts.
cmp al,'\' ;are we escaping something?
jne makepat_a
cmp byte ptr [si],0 ;is the '\' at the end?
je makepat_9 ;yes - just use \.
lodsb ;get the escaped char.
call escaped_char ;check for the special escapes.
jmp makepat_2
makepat_a:
cmp al,'.'
jne makepat_3
mov ax,offset omatch_ANY
call addset
jmp makepat_2
;this really belongs at the end of makepat, but the short jump can't get there.
makepat_0:
mov ax,offset omatch_EOS
call addset
cmp paren_count,0 ;did we match all the parens?
jne makepat__0_2 ;no, it's bad.
cmp di,dx ;did we fill up the space?
jne makepat__0_1 ;no.
makepat__0_2:
stc ;yes, it's bad.
ret
makepat__0_1:
clc
ret
makepat_3:
cmp al,'^'
jne makepat_7
lea ax,[si-1] ;get the buffer pointer.
cmp ax,inpat_ptr ;are we at the beginning?
jne makepat_6 ;no - this can't be it.
mov ax,offset omatch_BOL
call addset
jmp makepat_2
makepat_6:
mov al,'^'
call addchar
jmp makepat_2
makepat_7:
cmp al,'$'
jne makepat_8
cmp word ptr [si],'\' + '|'*256;is the '$' at the end of an alternation?
je makepat_7a ;no - not special.
cmp byte ptr [si],0 ;is the '$' at the end?
jne makepat_9 ;no - not special.
makepat_7a:
mov ax,offset omatch_EOL
call addset
jmp makepat_2
makepat_9:
call addchar
jmp makepat_2
makepat_8:
cmp al,'['
jne makepat_10
call getccl
jnc makepat_2
pop di
stc
ret
makepat_10:
cmp al,'*'
jne makepat_11
cmp last_pattern,0 ;is last_pattern>0?
jnge makepat_12 ;no - not closure.
mov bx,last_pattern
mov ax,word ptr [bx]
cmp ax,offset omatch_CLO ;trying to close a closure?
je makepat_12 ;yes - not closure.
cmp ax,offset omatch_BOL ;trying to close a beginning of line?
je makepat_12 ;yes - not closure.
call stclos
mov this_pattern,bx ;remember where this one was.
jmp makepat_2
makepat_11:
;put more characters here.
makepat_12:
call addchar
jmp makepat_2
makepat_2:
mov bx,this_pattern
mov last_pattern,bx
jmp makepat_1
escaped_char:
mov cx,offset omatch_NL
cmp al,"n" ;newline?
je escaped_1
mov cx,offset omatch_BOB
cmp al,"`" ;beginning of buffer?
je escaped_1
mov cx,offset omatch_EOB
cmp al,"'" ;end of buffer?
je escaped_1
mov cx,offset omatch_WOR
cmp al,"b" ;beginning or end of word?
je escaped_1
mov cx,offset omatch_NWR
cmp al,"B" ;not beginning nor end of word?
je escaped_1
mov cx,offset omatch_BOW
cmp al,"<" ;beginning of word?
je escaped_1
mov cx,offset omatch_EOW
cmp al,">" ;end of word?
je escaped_1
mov cx,offset omatch_ISW
cmp al,"w" ;word character?
je escaped_1
mov cx,offset omatch_NOW
cmp al,"W" ;not word character?
je escaped_1
inc paren_count ;increase the paren count.
mov cx,offset omatch_PAREN
cmp al,"(" ;start sub-regexp?
je escaped_1
add paren_count,-2 ;decrease the paren count.
mov cx,offset omatch_EOS
cmp al,")" ;stop sub-regexp?
je escaped_1
inc paren_count ;oops, not a paren.
cmp al,'|' ;is this an "or" operator?
jne addchar ;no.
mov inpat_ptr,si ;start a new regexp here...
call stor ;store a "or" operator.
ret
escaped_1:
mov ax,cx
call addset
ret
addchar:
;al = CHR to put.
push ax
mov ax,which_chr ;use the right omatch_chr.
call addset
pop ax
call addbyte
ret
addset: ;only command chars call addset.
call addbyte
xchg ah,al
call addbyte
xchg ah,al
ret
addbyte:
;al = char to put, di->dest, dx->end of dest.
cmp di,dx
je addbyte_1
mov [di],al
inc di
addbyte_1:
ret
stclos:
;di->last set added + 1
;bx->last closure added
push di
stclos_1:
dec di
mov al,[di]
mov [di+2],al
cmp di,bx
jne stclos_1
stclos_2:
mov word ptr [bx],offset omatch_CLO
pop di
add di,2
ret
stor:
;di->last set added + 1
mov bx,last_or
push di
stor_1:
dec di
mov al,[di]
mov [di+4],al
cmp di,bx
jne stor_1
stor_2:
pop di ;get the new last set.
add di,4
mov ax,offset omatch_EOS ;store the end of string.
stosw
mov word ptr [bx],offset omatch_OR
mov [bx+2],di ;remember where the next starts.
mov last_or,di
ret
getccl:
;si -> source (null terminated)
;di -> dest, dx -> end of dest
;return cy=1 if error.
lodsb
cmp al,'^'
jne getccl_1
mov ax,offset omatch_NCCL
call addset
jmp getccl_2
getccl_1:
dec si ;unparse the '^'.
mov ax,offset omatch_CCL
call addset
getccl_2:
push bx
mov bx,di
call addbyte ;leave room for count
call dodash
mov ax,di
sub ax,bx
dec al
mov [bx],al
pop bx
lodsb
cmp al,']' ;now make sure that we end in ']'.
je getccl_3 ;yup, we do.
dec si ;make si -> the null.
stc
ret
getccl_3:
clc
ret
dodash:
;si -> source pattern (null terminated)
;di -> destination pattern
;dx -> end of destination pattern
push bx
mov bx,si
dodash_1:
lodsb
or al,al
je dodash_2
cmp al,']'
je dodash_2
cmp al,'-'
je dodash_4
call addbyte
jmp dodash_1
dodash_4:
cmp si,bx ;'-' at beginning?
je dodash_5
cmp [si].b,0 ;or '-' at end?
jne dodash_6
dodash_5:
mov al,'-' ;if at beginning or at end, just a '-'
call addbyte
jmp dodash_1
dodash_6:
mov al,[si-2] ;in increasing alphabetic order?
cmp al,[si]
ja dodash_5 ;no - forget it.
call alphanumeric ;left char alphanumeric?
jnc dodash_5 ;no - forget it.
mov al,[si]
call alphanumeric ;right char alphanumeric?
jnc dodash_5 ;no - forget it.
mov al,[si-2]
dodash_7:
inc al ;pre-increment -- the first one's there.
cmp al,[si]
ja dodash_9
call addbyte
jmp dodash_7
dodash_9:
inc si
jmp dodash_1
dodash_2:
dec si
pop bx
ret
alphanumeric:
;return cy=1 if al is alphanumeric
cmp al,'0'
jb alphanumeric_1
cmp al,'9'
jbe alphanumeric_2
cmp al,'A'
jb alphanumeric_1
cmp al,'Z'
jbe alphanumeric_2
cmp al,'a'
jb alphanumeric_1
cmp al,'z'
jbe alphanumeric_2
alphanumeric_1:
clc
ret
alphanumeric_2:
stc
ret
init_case_table:
push bx
mov init_case,offset init_case_2
mov bx,0
init_case_0:
mov case_ignore_table[bx],bl
inc bl
jne init_case_0
;now translate 'a' to 'A'.
mov bx,'a'
init_case_1:
mov al,bl
sub al,20h
mov case_ignore_table[bx],al
inc bx
cmp bx,'z'
jbe init_case_1
pop bx
init_case_2:
ret
code ends
end